Statistical Models for Regional Tornado Studies

James B. Elsner, Thomas H. Jagger, and Holly M. Widen

## [1] "Sourcing CountyStatisticsSupport.R on Thu Apr 30 19:19:19 2015"
## [1] "loading packages"
## [1] "Version 1.2 Fri Dec  5 13:19:14 2014"

Tornado Data

The model is built using data from the SPC tornado dataset.

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode = "wb")
unzip("tornado.zip",exdir="./tmp")
TornL = changeTornDataTypes(readOGR(dsn = "./tmp", layer = "tornado", 
                                    stringsAsFactors = FALSE))
## OGR data source with driver: ESRI Shapefile 
## Source: "./tmp", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions

Boundaries

Get administrative boundaries. https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. The 20m = 1:20,000,000 low resolution boundaries. Here we use the 5m = 1:5,000,000.

download.file("http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_county_5m.zip",
              "cb_2013_us_county_5m.zip", mode = "wb")
unzip("cb_2013_us_county_5m.zip",exdir="./tmp")
US.sp = readOGR(dsn = "./tmp", layer = "cb_2013_us_county_5m", 
                                    stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile 
## Source: "./tmp", layer: "cb_2013_us_county_5m"
## with 3234 features and 9 fields
## Feature type: wkbPolygon with 2 dimensions

Extract Kansas using the state Federal Information Processing Standard (FIPS).

FP = 20
AB = "KS"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 105
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 210845.3

Elevation Data

download.file(url = "http://www.viewfinderpanoramas.org/DEM/TIF15/15-H.zip",
              destfile = "15-H.ZIP", mode = "wb")
unzip("15-H.ZIP",exdir="./tmp")
Elev = raster("./tmp/15-H.tif")
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
state.sp = unionSpatialPolygons(countiesun.sp, 
                             ID = rep("1", length(row.names(countiesun.sp))))
Elev2 = Elev1[state.sp, ]
range(Elev2$X15.H, na.rm = TRUE)
## [1]  211 1227

County Warning Areas

L="http://www.nws.noaa.gov/geodata/catalog/wsom/data/bp03de14.dbx"
L="bp03de14.dbx"
CWA = read.table(L, sep = "|", quote = "", colClasses = "character")
cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county], 
                    cwa = CWA$V3[CWA$V7 %in% county])
table(cwa.df$cwa)
## 
## DDC EAX GID GLD ICT SGF TOP 
##  27   7   6  13  26   3  23

County map with terrain and CWA definitions.

terrain = div_gradient_pal(low = muted("blue"), 
                            mid = "white", 
                            high = muted("orange"), 
                            space = "rgb")(seq(0, 1, length = 104))
t1 = list("sp.lines", as(countiesun.sp, "SpatialLines"), col = "white")
centers = as.data.frame(coordinates(countiesun.sp))
centers$county = row.names(centers)
centers2 = merge(centers, cwa.df, by = "county")
l = list("sp.text", as.matrix(centers2[, 2:3]), centers2$cwa,
          col = "black", cex = .6)
spplot(Elev2, col.regions = terrain,
       scales = list(draw = TRUE),
       at = seq(211, 1230, length = 104),
       sp.layout = list(t1, l),
       colorkey = list(space = "bottom"),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Elevation (m)")

County Population Data

Use the population estimates from the Census Bureau from 1970-2012 see http://www.nber.org/data/census-intercensal-county-population.html. Use the 2012 value for 2013.

L = "county_population.csv"
pop0a = read.csv(L)
pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 2885905
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ] #order by spatial ID

Population changes by county.

PC = Pop.df %>%
  group_by(ID) %>%
  summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
range(spdf$Change)
## [1] -100.22981   60.69514
rng = seq(-125, 125, 25)
crq = brewer.pal(10, "BrBG")
l = list("sp.text", coordinates(spdf), spdf$Name,
          col = "black", cex = .6)
spplot(spdf, "Change", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = paste(rng)),
       sp.layout = list(l),
       par.settings = list(axis.line = list(col = NA)),
       sub = "% Population Change (1970 to 2012)")

Latest population values by county are needed for correlation and choropleth. The log of the population density is the common log.

poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
range(spdf$Ldensity)
## [1] -0.1883226  2.6577841
rng = seq(-.2, 2.8, .6)
crq = brewer.pal(6, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdf, "Ldensity", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = labs),
       sp.layout = list(l),
       par.settings = list(axis.line = list(col = NA)),
       sub = "2012 Population Density [persons per square km]")

sort(spdf$density, decreasing = TRUE)[1:3]
## [1] 454.7620 398.9293 194.4824

Subset tornado data

Only EF1+ tornadoes since 1970. Overlay tornado tracks on boundaries. Each element of the list is a county subset of the tornado track file. The data frame nTor.df contains the per-county tornado count, area, and rate.

The TracksAll.df data frame contains the tornado track attributes listed by county. Track information is repeated for each county in cases where the track intersects more than one county.

TornL2 = subset(TornL, Year >= 1970 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$Date)))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears =  Nyears,
                     nT = nT, area = area/1000000, 
                     nTd = nTd,
                     rate = nT/area * 10^6 * 10000 / Nyears,
                     stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$nTd = nTor.df$nTd
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = 3.7244, df = 103, p-value = 0.0003198
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.1938519 0.4792929
## sample estimates:
##       cor 
## 0.3445102
cor.test(spdf$nT, spdf$pop, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$pop
## t = 0.4397, df = 103, p-value = 0.6611
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.1189857  0.2033049
## sample estimates:
##        cor 
## 0.04328566

Area is now given in square km. Rate is tornadoes per sq m per 10,000 years. The most counties affected by a single tornado is 7.

Annual Counts by County

The data frame nTor.year is the number of tornadoes affecting the state by year.

nTor.year = TracksAll.df %>%
  filter(!duplicated(SeqID)) %>%
  group_by(Year) %>%
  summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 1010
ggplot(nTor.year[nTor.year$Year > 1996, ], aes(x = factor(Year), y = nT)) + 
  geom_bar(stat = 'identity', fill = "gray") +
  geom_text(aes(label = nT), vjust = -.5, size = 2, color = "darkblue") +
  xlab("Year") + ylab("Annual Number of EF1+ Tornadoes in Kansas\nSince the Release of the Movie Twister in 1996") +
  theme(axis.text.x  = element_text(size = 11), 
        legend.position = "none")

The data frame nTor.day0 contains the tornado county and tornado day county by year and county.

nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize, 
                  nT = length(county), 
                  nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
                 nTor.day0)

Choropleth map of tornado counts & tornado days.

crq = wes.palette(name = "Zissou", type = "continuous")
range(spdf$nT)
rng = seq(0, 30, 5)
spplot(spdf, "nT", col = "white", at = rng, 
       col.regions = crq(8),
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Number of EF1+ Tornadoes (1970-2013)")

crq = brewer.pal(5, "GnBu")
range(spdf$nTd)
rng = seq(0, 20, 4)
spplot(spdf, "nTd", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom", labels = paste(rng)),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Number of EF1+ Tornado Days (1970-2013)")

A function to plot tracks and county counts using ggmap.

crq = wes.palette(name = "Zissou", type = "continuous")
plotTorn(x = spdf, tracks = TracksAll.df) +
  scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)", 
                       colours = crq(10))

On average the larger counties have more tornadoes.

Merge tornado and population data.

popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year

Additional year column (Year2) needed for the models.

Models for county-level tornado counts

Spatial neighborhood definition as an inla graph

nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")

The area times the Nyears indicates the square meter years exposed to tornadoes (E). Scale the exposure to have a mean of unity. The model has a spatial id. E is scaled to have a mean of one. constr = constant rate. The DIC measures how well the model fits the data; the larger this is, the worse the fit. Year is centered on 1991.

Model with spatial term. Determine the model that results in the lowest DIC. Then compare this model with a model where the spatial term is removed.

formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
             data = popNtor,
             quantiles = c(.05, .5, .95),
             control.compute = control$compute,
             control.predictor = control$predictor,
             control.results = control$results,
             control.family = control$family
             )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2258          8.5270          0.3366          9.0894 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.7651 0.1225   -1.9686  -1.7640   -1.5655
## Ldensity                 0.1187 0.0324    0.0655   0.1186    0.1723
## I(Year - 1991)           0.0189 0.0082    0.0054   0.0189    0.0323
## Ldensity:I(Year - 1991) -0.0045 0.0017   -0.0073  -0.0045   -0.0017
##                            mode kld
## (Intercept)             -1.7618   0
## Ldensity                 0.1183   0
## I(Year - 1991)           0.0188   0
## Ldensity:I(Year - 1991) -0.0045   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean     sd
## size for the nbinomial observations (overdispersion) 0.4774 0.0421
## Precision for ID                                     5.9720 2.7635
## Precision for Year2                                  3.4524 0.9368
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.4119   0.4753
## Precision for ID                                        2.7919   5.3500
## Precision for Year2                                     2.1282   3.3415
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)    0.5501 0.471
## Precision for ID                                       11.2501 4.351
## Precision for Year2                                     5.1607 3.134
## 
## Expected number of effective parameters(std dev): 65.81(7.162)
## Number of equivalent replicates : 71.80 
## 
## Deviance Information Criterion: 5990.50
## Effective number of parameters: 67.44
## 
## Marginal Likelihood:  -3140.65 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model w/out spatial term

formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
                f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
              data = popNtor,
              quantiles = c(.05, .5, .95),
              control.compute = control$compute,
              control.predictor = control$predictor,
              control.results = control$results,
              control.family = control$family
              )
summary(model0)
## 
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1980          5.2306          0.2719          5.7005 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.6524 0.1071   -1.8302  -1.6514   -1.4779
## Ldensity                 0.0903 0.0220    0.0540   0.0902    0.1266
## I(Year - 1991)           0.0192 0.0082    0.0056   0.0192    0.0328
## Ldensity:I(Year - 1991) -0.0045 0.0017   -0.0073  -0.0045   -0.0017
##                            mode kld
## (Intercept)             -1.6495   0
## Ldensity                 0.0902   0
## I(Year - 1991)           0.0192   0
## Ldensity:I(Year - 1991) -0.0045   0
## 
## Random effects:
## Name   Model
##  Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean     sd
## size for the nbinomial observations (overdispersion) 0.4319 0.0359
## Precision for Year2                                  3.3579 0.9090
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.3759   0.4301
## Precision for Year2                                     2.0771   3.2478
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.4938 0.4264
## Precision for Year2                                     5.0194 3.0409
## 
## Expected number of effective parameters(std dev): 38.41(1.562)
## Number of equivalent replicates : 123.01 
## 
## Deviance Information Criterion: 6027.30
## Effective number of parameters: 39.54
## 
## Marginal Likelihood:  -3069.89 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model diagnostics. The predictive quality of the model is assessed by the cross-validated log score. A smaller value of the score indicates better prediction quality (Gneiting and Raftery 2007).

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 0.4042, p-value = 0.8445
-mean(log(model$cpo$cpo))
## [1] 0.6346069
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 19.4748, df = 4723, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.2503429 0.2946511
## sample estimates:
##       cor 
## 0.2726415

Brier score

brier.score <- function(x, m){
  with(m, {mean(x^2) - 2 * mean(x * mean) + mean(mean^2 + sd^2)})
}
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.5696539

Test case for PIT

ggplot() + 
  geom_histogram(aes(x = modPIT), color = "white",
                 fill = "grey", binwidth = .05) + 
  xlab("Probability") +
  ylab("Frequency") +
  ggtitle("Distribution of modified PIT") #looks good!

The spatial random term by region (county) is given in the data frame model\(summary.random\)ID.

spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
## 
## FALSE  TRUE 
##    82    23
spdf$Sig = df$Sig

Plot function for the random-effect term.

CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]", 
            low = muted("blue"), mid = "white", high = muted("red"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
  geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("red"), size = 2) +
  geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("blue"), size = 2) +
  geom_text(data = df3[df3$Sig,], 
              aes(x = long, y = lat, group = NULL, label = round(newfield)), 
              vjust = .5, size = 3, color = 'black')

Marginal standard deviations

CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[3] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
ggplotSpdata(spdf, "sd", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Random Effect S.D.\n[% of State Avg]", 
            low = muted("yellow"), mid = "white", high = muted("green"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
    geom_text(data = df3, 
              aes(x = long, y = lat, group = NULL, label = round((exp(newfield) - 1) * 100)), 
              vjust = .5, size = 3, color = 'white')

Marginal s.d. tend to be lower (precision higher) in interior counties having a larger number of neighbors.

Diagnostic plots.

plot(model)

Surface roughness and low-level inflow hypothesis

Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
                     Elev = unlist(Elev.data), 
                     ID = rep(spdf$ID, sapply(Elev.data, nrow)),
                     stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
## 
##  One Sample t-test
## 
## data:  Elev.df$Elev
## t = 2614.083, df = 1267820, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
##  580.5066 581.2376
## sample estimates:
## mean of x 
##  580.8721
CE.df = Elev.df %>%
  group_by(ID) %>%
  summarize(elev = mean(Elev, na.rm = TRUE),
            elevS = sd(Elev, na.rm = TRUE),
            elevK = kurtosis(Elev, na.rm = TRUE),
            elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$elevS
## t = -0.6865, df = 103, p-value = 0.4939
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.22646565  0.09498148
## sample estimates:
##         cor 
## -0.06749335
range(spdf$elevS)
rng = seq(10, 80, 10)
crq = brewer.pal(7, "YlOrBr")
spplot(spdf, "elevS", col = "white", at = rng, 
       col.regions = crq,
       colorkey = list(space = "bottom"),
       par.settings = list(axis.line = list(col = NA)),
       sub = "Elevation S.D.[m]")

Function for plotting the density of the marginal term.

formula = nT ~ elevS +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
                Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
                f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
             data = popNtor2,
             quantiles = c(.05, .5, .95),
             control.compute = control$compute,
             control.predictor = control$predictor,
             control.results = control$results,
             control.family = control$family
             )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2914          8.7102          0.4299          9.4316 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.0846 0.2143   -1.4377  -1.0844   -0.7321
## elevS                   -0.0186 0.0049   -0.0268  -0.0186   -0.0106
## Ldensity                 0.0717 0.0339    0.0162   0.0716    0.1277
## I(Year - 1991)           0.0181 0.0082    0.0046   0.0181    0.0316
## Ldensity:I(Year - 1991) -0.0043 0.0017   -0.0071  -0.0043   -0.0015
##                            mode kld
## (Intercept)             -1.0839   0
## elevS                   -0.0185   0
## Ldensity                 0.0713   0
## I(Year - 1991)           0.0181   0
## Ldensity:I(Year - 1991) -0.0043   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 0.483 0.0426
## Precision for ID                                     7.079 3.2309
## Precision for Year2                                  3.438 0.9375
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.4168   0.4808
## Precision for ID                                        3.3150   6.3701
## Precision for Year2                                     2.1276   3.3192
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.5567 0.4761
## Precision for ID                                       13.2296 5.2172
## Precision for Year2                                     5.1569 3.0951
## 
## Expected number of effective parameters(std dev): 63.76(6.582)
## Number of equivalent replicates : 74.10 
## 
## Deviance Information Criterion: 5979.79
## Effective number of parameters: 65.32
## 
## Marginal Likelihood:  -3142.25 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
## 
## FALSE  TRUE 
##    75    30
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]", 
            low = muted("blue"), mid = "white", high = muted("red"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
  geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("red"), size = 2) +
  geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("blue"), size = 2) +
  geom_text(data = df3[df3$Sig,], 
              aes(x = long, y = lat, group = NULL, label = round(newfield)), 
              vjust = .5, size = 3, color = 'black')

Model diagnostics

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 0.8594, p-value = 0.4401
-mean(log(model$cpo$cpo))
## [1] 0.6334402
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 12.1733, df = 4723, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.1511163 0.1975253
## sample estimates:
##       cor 
## 0.1744177
ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +  
  xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))

County warning area as a significant factor

cwa.df$DDC = cwa.df$cwa == "DDC"
cwa.df$GLD = cwa.df$cwa == "GLD"
cwa.df$GID = cwa.df$cwa == "GID"
cwa.df$TOP = cwa.df$cwa == "TOP"
cwa.df$ICT = cwa.df$cwa == "ICT"
cwa.df$EAX = cwa.df$cwa == "EAX"
cwa.df$SGF = cwa.df$cwa == "SGF"
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ elevS + I(DDC + GID) + 
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
             data = popNtor3,
             quantiles = c(.05, .5, .95),
             control.compute = control$compute,
             control.predictor = control$predictor,
             control.results = control$results,
             control.family = control$family
              )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3153         10.3534          0.8006         11.4693 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.2503 0.2093   -1.5947  -1.2504   -0.9055
## elevS                   -0.0182 0.0046   -0.0259  -0.0182   -0.0107
## I(DDC + GID)             0.4112 0.1163    0.2185   0.4120    0.6011
## Ldensity                 0.0839 0.0319    0.0316   0.0838    0.1366
## I(Year - 1991)           0.0186 0.0082    0.0051   0.0186    0.0320
## Ldensity:I(Year - 1991) -0.0045 0.0017   -0.0073  -0.0045   -0.0017
##                            mode kld
## (Intercept)             -1.2507   0
## elevS                   -0.0181   0
## I(DDC + GID)             0.4137   0
## Ldensity                 0.0837   0
## I(Year - 1991)           0.0186   0
## Ldensity:I(Year - 1991) -0.0044   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                         mean      sd
## size for the nbinomial observations (overdispersion)  0.4815  0.0424
## Precision for ID                                     18.6667 16.8629
## Precision for Year2                                   3.4526  0.9365
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.4154   0.4794
## Precision for ID                                        5.2754  13.6081
## Precision for Year2                                     2.1302   3.3411
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)    0.5547 0.475
## Precision for ID                                       48.5881 8.449
## Precision for Year2                                     5.1616 3.132
## 
## Expected number of effective parameters(std dev): 56.28(7.203)
## Number of equivalent replicates : 83.95 
## 
## Deviance Information Criterion: 5976.85
## Effective number of parameters: 58.43
## 
## Marginal Likelihood:  -3142.01 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model diagnostics

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 0.1926, p-value = 0.9923
-mean(log(model$cpo$cpo))
## [1] 0.6330331
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 19.1046, df = 4723, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.2454750 0.2899076
## sample estimates:
##       cor 
## 0.2678337
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.5691174

Test of major highway or not in the county. U.S. highway or interstate.

hiWays = read.table("MajorHiWays_KS.txt", header = TRUE,
                    stringsAsFactors = FALSE)
hiWays$county = as.character(hiWays$county)
popNtor4 = merge(popNtor3, hiWays, by = "county")
formula = nT ~ elevS + DDC + GID + highway +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity * I(Year - 1991) + 
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
             data = popNtor4,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor4, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3365         12.9222          0.3977         13.6564 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.2251 0.2085   -1.5683  -1.2253   -0.8815
## elevS                   -0.0196 0.0047   -0.0273  -0.0196   -0.0120
## DDCTRUE                  0.3631 0.1256    0.1542   0.3644    0.5670
## GIDTRUE                  0.6203 0.1898    0.3023   0.6238    0.9260
## highway                  0.1149 0.0876   -0.0300   0.1153    0.2583
## Ldensity                 0.0685 0.0332    0.0141   0.0683    0.1236
## I(Year - 1991)           0.0184 0.0082    0.0050   0.0184    0.0319
## Ldensity:I(Year - 1991) -0.0044 0.0017   -0.0072  -0.0044   -0.0016
##                            mode kld
## (Intercept)             -1.2255   0
## elevS                   -0.0195   0
## DDCTRUE                  0.3672   0
## GIDTRUE                  0.6313   0
## highway                  0.1162   0
## Ldensity                 0.0678   0
## I(Year - 1991)           0.0184   0
## Ldensity:I(Year - 1991) -0.0044   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                         mean      sd
## size for the nbinomial observations (overdispersion)  0.4795  0.0422
## Precision for Year2                                   3.4336  0.9334
## Precision for ID                                     35.3467 56.7188
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.4136   0.4775
## Precision for Year2                                     2.1216   3.3190
## Precision for ID                                        6.0900  19.2662
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.5522 0.4734
## Precision for Year2                                     5.1417 3.1033
## Precision for ID                                      113.6871 9.1985
## 
## Expected number of effective parameters(std dev): 55.68(7.904)
## Number of equivalent replicates : 84.86 
## 
## Deviance Information Criterion: 5979.78
## Effective number of parameters: 58.28
## 
## Marginal Likelihood:  -3151.54 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Illinois

Borders

FP = 17
AB = "IL"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 102
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 144450.6

Elevation

Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal

CWA

cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county], 
                    cwa = CWA$V3[CWA$V7 %in% county])

Population

pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 12875255
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
  group_by(ID) %>%
  summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)

Tornadoes

ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears =  Nyears,
                     nT = nT, area = area/1000000, 
                     rate = nT/area * 10^6 * 10000 / Nyears,
                     stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = 8.2204, df = 100, p-value = 7.612e-13
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.525911 0.723573
## sample estimates:
##       cor 
## 0.6350236
cor.test(spdf$nT, spdf$pop, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$pop
## t = 1.4921, df = 100, p-value = 0.1388
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.01664851  0.30405167
## sample estimates:
##       cor 
## 0.1475784
nTor.year = TracksAll.df %>%
  filter(!duplicated(SeqID)) %>%
  group_by(Year) %>%
  summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 879
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize, 
                  nT = length(county), 
                  nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
                 nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year

Raw counts and tracks

p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40.07 +lon_0=-89.2 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1IL = plotTorn(x = spdf, tracks = TracksAll.df) +
         scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)", 
                              colours = crq(10))
#crq = wes.palette(name = "Zissou", type = "continuous")
#range(spdf$nT)
#rng = seq(0, 40, 5)
#p1IL = spplot(spdf, "nT", col = "white", at = rng, 
#         col.regions = crq(8),
#         colorkey = list(space = "bottom", labels = paste(rng)),
#         par.settings = list(axis.line = list(col = NA)),
#         sub = "EF1+ Tornado Reports (1970-2013)")

Linear Trend (%/yr)

formula = nT ~ Year
model = inla(formula = formula, 
                 family = "nbinomial", 
                 quantiles = c(.05, .5, .95), 
                 data = nTor.year,
                 control.compute = control$compute,
                 control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL =  model$summary.fixed$'0.05quant'[2]
TH =  model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] 0.4817024
(exp(TL) - 1) * 100
## [1] -0.7730836
(exp(TH) - 1) * 100
## [1] 1.751856

Nonlinear trend

formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula, 
                     family = "nbinomial", 
                     quantiles = c(.05, .5, .95), 
                     data = nTor.year,
                     control.compute = control$compute,
                     control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
                 QL = model$summary.fitted.values$"0.05quant",
                 QM = model$summary.fitted.values$"0.5quant",
                 QH = model$summary.fitted.values$"0.95quant",
                 nT = nTor.year$nT,
                 ST = AB)
p2IL = ggplot(out, aes(x = Year, y = QM)) +
  geom_line(color = 'blue') +
  geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
  geom_point(aes(x = Year, y = nT)) +
  ylab("Number of EF1+ Tornadoes") +
  theme_bw()

Spatial neighborhood definition as an inla graph

nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")

Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.

formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
                Ldensity + Ldensity:I(Year - 1991) +
                f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2187          7.7628          0.3514          8.3329 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.8599 0.2019   -2.1945  -1.8585   -1.5300
## Ldensity                 0.1083 0.0340    0.0525   0.1082    0.1643
## Ldensity:I(Year - 1991) -0.0016 0.0012   -0.0036  -0.0016    0.0004
##                            mode kld
## (Intercept)             -1.8557   0
## Ldensity                 0.1081   0
## Ldensity:I(Year - 1991) -0.0015   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 1.418 0.1871
## Precision for ID                                     5.344 2.2389
## Precision for Year2                                  1.945 0.5039
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.137    1.403
## Precision for ID                                         2.664    4.876
## Precision for Year2                                      1.225    1.889
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     1.750 1.371
## Precision for ID                                         9.601 4.090
## Precision for Year2                                      2.859 1.784
## 
## Expected number of effective parameters(std dev): 70.08(6.754)
## Number of equivalent replicates : 65.50 
## 
## Deviance Information Criterion: 5210.67
## Effective number of parameters: 71.37
## 
## Marginal Likelihood:  -2750.42 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model diagnostics

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 1.5937, p-value = 0.1556
-mean(log(model$cpo$cpo))
## [1] 0.5683614
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 22.7776, df = 4588, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.2967520 0.3403841
## sample estimates:
##       cor 
## 0.3187369
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.4153898

Model w/out spatial term

formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) + 
                f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model0)
## 
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1910          4.5306          0.2918          5.0134 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.5913 0.1597   -1.8558  -1.5903   -1.3302
## Ldensity                 0.0598 0.0218    0.0239   0.0599    0.0956
## Ldensity:I(Year - 1991) -0.0014 0.0012   -0.0034  -0.0014    0.0006
##                            mode kld
## (Intercept)             -1.5883   0
## Ldensity                 0.0600   0
## Ldensity:I(Year - 1991) -0.0014   0
## 
## Random effects:
## Name   Model
##  Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 1.191 0.1429
## Precision for Year2                                  1.944 0.5059
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.9748    1.180
## Precision for Year2                                     1.2261    1.886
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     1.444 1.156
## Precision for Year2                                      2.866 1.775
## 
## Expected number of effective parameters(std dev): 40.39(1.084)
## Number of equivalent replicates : 113.63 
## 
## Deviance Information Criterion: 5268.22
## Effective number of parameters: 41.42
## 
## Marginal Likelihood:  -2691.72 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
## 
## FALSE  TRUE 
##    80    22
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3IL = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]", 
            low = muted("blue"), mid = "white", high = muted("red"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
  geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("red"), size = 2) +
  geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("blue"), size = 2) +
  geom_text(data = df3[df3$Sig,], 
              aes(x = long, y = lat, group = NULL, label = round(newfield)), 
              vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
                     Elev = unlist(Elev.data), 
                     ID = rep(spdf$ID, sapply(Elev.data, nrow)),
                     stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
## 
##  One Sample t-test
## 
## data:  Elev.df$Elev
## t = 4480.956, df = 887518, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
##  189.0353 189.1742
## sample estimates:
## mean of x 
##  189.1048
CE.df = Elev.df %>%
  group_by(ID) %>%
  summarize(elev = mean(Elev, na.rm = TRUE),
            elevS = sd(Elev, na.rm = TRUE),
            elevK = kurtosis(Elev, na.rm = TRUE),
            elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$elevS
## t = -1.7537, df = 100, p-value = 0.08254
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.327298205 -0.009171022
## sample estimates:
##        cor 
## -0.1727358
range(CE.df$elevS)
## [1]  5.487916 37.767288
formula = nT ~ elevS +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + 
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor2,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2809          7.6899          0.3487          8.3196 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.7623 0.2477   -2.1735  -1.7602   -1.3584
## elevS                   -0.0051 0.0075   -0.0173  -0.0052    0.0073
## Ldensity                 0.1071 0.0340    0.0512   0.1070    0.1632
## Ldensity:I(Year - 1991) -0.0016 0.0012   -0.0036  -0.0015    0.0004
##                            mode kld
## (Intercept)             -1.7558   0
## elevS                   -0.0053   0
## Ldensity                 0.1069   0
## Ldensity:I(Year - 1991) -0.0015   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 1.416 0.1868
## Precision for ID                                     5.447 2.3798
## Precision for Year2                                  1.944 0.5039
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.136    1.401
## Precision for ID                                         2.659    4.927
## Precision for Year2                                      1.225    1.887
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     1.748 1.368
## Precision for ID                                         9.990 4.075
## Precision for Year2                                      2.859 1.782
## 
## Expected number of effective parameters(std dev): 70.57(6.88)
## Number of equivalent replicates : 65.05 
## 
## Deviance Information Criterion: 5211.97
## Effective number of parameters: 71.90
## 
## Marginal Likelihood:  -2758.53 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
p4IL = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +  
  xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ cwa +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + 
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2922          8.6809          0.3151          9.2882 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.9951 0.2567   -2.4184  -1.9946   -1.5734
## cwaILX                   0.2217 0.2028   -0.1151   0.2236    0.5519
## cwaLOT                   0.0633 0.2248   -0.3064   0.0632    0.4331
## cwaLSX                   0.0337 0.2546   -0.3898   0.0367    0.4472
## cwaPAH                   0.2969 0.3111   -0.2172   0.2983    0.8058
## Ldensity                 0.1053 0.0345    0.0486   0.1053    0.1621
## Ldensity:I(Year - 1991) -0.0016 0.0012   -0.0036  -0.0016    0.0004
##                            mode kld
## (Intercept)             -1.9935   0
## cwaILX                   0.2275   0
## cwaLOT                   0.0632   0
## cwaLSX                   0.0427   0
## cwaPAH                   0.3016   0
## Ldensity                 0.1052   0
## Ldensity:I(Year - 1991) -0.0015   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 1.420 0.1877
## Precision for ID                                     5.419 2.4819
## Precision for Year2                                  1.942 0.5036
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.139    1.405
## Precision for ID                                         2.559    4.860
## Precision for Year2                                      1.224    1.885
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     1.754 1.372
## Precision for ID                                        10.158 3.963
## Precision for Year2                                      2.857 1.779
## 
## Expected number of effective parameters(std dev): 72.34(7.046)
## Number of equivalent replicates : 63.45 
## 
## Deviance Information Criterion: 5213.33
## Effective number of parameters: 73.69
## 
## Marginal Likelihood:  -2769.24 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Mississippi

Borders

FP = 28
AB = "MS"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 82
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 123700.9

Elevation

Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal

CWA

cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county], 
                    cwa = CWA$V3[CWA$V7 %in% county])

Population

pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 2984926
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
  group_by(ID) %>%
  summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)

Tornadoes

ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears =  Nyears,
                     nT = nT, area = area/1000000, 
                     rate = nT/area * 10^6 * 10000 / Nyears,
                     stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = 5.9599, df = 80, p-value = 6.475e-08
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.4134847 0.6695507
## sample estimates:
##       cor 
## 0.5545082
cor.test(spdf$nT, spdf$pop, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$pop
## t = 5.0839, df = 80, p-value = 2.38e-06
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.3421089 0.6209809
## sample estimates:
##       cor 
## 0.4941524
nTor.year = TracksAll.df %>%
  filter(!duplicated(SeqID)) %>%
  group_by(Year) %>%
  summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 1112
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize, 
                  nT = length(county), 
                  nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
                 nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year

Raw counts and tracks

p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=32.8 +lon_0=-89.7 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1MS = plotTorn(x = spdf, tracks = TracksAll.df) +
         scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)", 
                              colours = crq(10))

Linear Trend (%/yr)

formula = nT ~ Year
model = inla(formula = formula, 
                 family = "nbinomial", 
                 quantiles = c(.05, .5, .95), 
                 data = nTor.year,
                 control.compute = control$compute,
                 control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL =  model$summary.fixed$'0.05quant'[2]
TH =  model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] 0.4387827
(exp(TL) - 1) * 100
## [1] -0.7772043
(exp(TH) - 1) * 100
## [1] 1.671025

Nonlinear trend

formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula, 
                     family = "nbinomial", 
                     quantiles = c(.05, .5, .95), 
                     data = nTor.year,
                     control.compute = control$compute,
                     control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
                 QL = model$summary.fitted.values$"0.05quant",
                 QM = model$summary.fitted.values$"0.5quant",
                 QH = model$summary.fitted.values$"0.95quant",
                 nT = nTor.year$nT,
                 ST = AB)
p2MS = ggplot(out, aes(x = Year, y = QM)) +
  geom_line(color = 'blue') +
  geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
  geom_point(aes(x = Year, y = nT)) +
  ylab("Number of EF1+ Tornadoes") +
  theme_bw()

Spatial neighborhood definition as an inla graph

nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")

Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.

formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
                Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
                f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1915          5.6690          0.2765          6.1370 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.4001 0.1767   -1.6917  -1.3996   -1.1101
## Ldensity                 0.1304 0.0345    0.0734   0.1305    0.1868
## I(Year - 1991)           0.0230 0.0116    0.0039   0.0230    0.0422
## Ldensity:I(Year - 1991) -0.0050 0.0020   -0.0083  -0.0050   -0.0018
##                            mode kld
## (Intercept)             -1.3986   0
## Ldensity                 0.1308   0
## I(Year - 1991)           0.0230   0
## Ldensity:I(Year - 1991) -0.0050   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 2.563 0.3815
## Precision for ID                                     9.758 4.5830
## Precision for Year2                                  2.291 0.5839
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.999    2.527
## Precision for ID                                         4.491    8.728
## Precision for Year2                                      1.454    2.227
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     3.247 2.453
## Precision for ID                                        18.505 7.078
## Precision for Year2                                      3.349 2.107
## 
## Expected number of effective parameters(std dev): 65.10(5.918)
## Number of equivalent replicates : 56.68 
## 
## Deviance Information Criterion: 5680.10
## Effective number of parameters: 66.63
## 
## Marginal Likelihood:  -2979.53 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model diagnostics

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 1.195, p-value = 0.2694
-mean(log(model$cpo$cpo))
## [1] 0.7698513
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 27.0459, df = 3688, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.3839812 0.4291863
## sample estimates:
##       cor 
## 0.4068328
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.5644638

Model w/out spatial term

formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
                f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model0)
## 
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1694          3.3448          0.1853          3.6995 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.4200 0.1575   -1.6804  -1.4192   -1.1621
## Ldensity                 0.1456 0.0278    0.0999   0.1456    0.1914
## I(Year - 1991)           0.0234 0.0117    0.0041   0.0234    0.0427
## Ldensity:I(Year - 1991) -0.0051 0.0020   -0.0083  -0.0051   -0.0018
##                            mode kld
## (Intercept)             -1.4178   0
## Ldensity                 0.1456   0
## I(Year - 1991)           0.0234   0
## Ldensity:I(Year - 1991) -0.0051   0
## 
## Random effects:
## Name   Model
##  Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 2.171 0.2921
## Precision for Year2                                  2.284 0.5848
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.736    2.146
## Precision for Year2                                      1.451    2.217
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     2.692 2.091
## Precision for Year2                                      3.348 2.091
## 
## Expected number of effective parameters(std dev): 41.67(0.8821)
## Number of equivalent replicates : 88.56 
## 
## Deviance Information Criterion: 5728.65
## Effective number of parameters: 42.71
## 
## Marginal Likelihood:  -2932.05 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
## 
## FALSE  TRUE 
##    61    21
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3MS = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]", 
            low = muted("blue"), mid = "white", high = muted("red"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
  geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("red"), size = 2) +
  geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("blue"), size = 2) +
  geom_text(data = df3[df3$Sig,], 
              aes(x = long, y = lat, group = NULL, label = round(newfield)), 
              vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
                     Elev = unlist(Elev.data), 
                     ID = rep(spdf$ID, sapply(Elev.data, nrow)),
                     stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
## 
##  One Sample t-test
## 
## data:  Elev.df$Elev
## t = 1759.903, df = 684694, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
##  85.62105 85.78124
## sample estimates:
## mean of x 
##  85.70115
CE.df = Elev.df %>%
  group_by(ID) %>%
  summarize(elev = mean(Elev, na.rm = TRUE),
            elevS = sd(Elev, na.rm = TRUE),
            elevK = kurtosis(Elev, na.rm = TRUE),
            elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$elevS
## t = -0.2054, df = 80, p-value = 0.8378
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.2050763  0.1606901
## sample estimates:
##         cor 
## -0.02296144
range(CE.df$elevS)
## [1]  1.557093 35.021785
formula = nT ~ elevS +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor2,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2505          6.0614          0.3429          6.6548 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.1738 0.2219   -1.5388  -1.1740   -0.8082
## elevS                   -0.0098 0.0058   -0.0194  -0.0097   -0.0003
## Ldensity                 0.1184 0.0352    0.0602   0.1186    0.1761
## I(Year - 1991)           0.0227 0.0116    0.0036   0.0227    0.0418
## Ldensity:I(Year - 1991) -0.0049 0.0020   -0.0082  -0.0049   -0.0017
##                            mode kld
## (Intercept)             -1.1743   0
## elevS                   -0.0096   0
## Ldensity                 0.1189   0
## I(Year - 1991)           0.0227   0
## Ldensity:I(Year - 1991) -0.0049   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 2.571 0.3831
## Precision for ID                                     9.377 4.1642
## Precision for Year2                                  2.289 0.5837
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     2.006    2.535
## Precision for ID                                         4.476    8.482
## Precision for Year2                                      1.454    2.225
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     3.258 2.459
## Precision for ID                                        17.299 7.007
## Precision for Year2                                      3.347 2.104
## 
## Expected number of effective parameters(std dev): 66.01(5.666)
## Number of equivalent replicates : 55.90 
## 
## Deviance Information Criterion: 5678.41
## Effective number of parameters: 67.45
## 
## Marginal Likelihood:  -2986.72 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
p4MS = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +  
  xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
cwa.df$JAN = cwa.df$cwa == "JAN"
cwa.df$MOB = cwa.df$cwa == "MOB"
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ elevS + cwa +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3107          6.9955          0.4128          7.7189 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -0.9165 0.2192   -1.2758  -0.9175   -0.5538
## elevS                   -0.0094 0.0054   -0.0184  -0.0094   -0.0008
## cwaLIX                  -0.2412 0.1450   -0.4828  -0.2393   -0.0063
## cwaMEG                  -0.2417 0.1476   -0.4745  -0.2474    0.0105
## cwaMOB                  -0.8276 0.2051   -1.1727  -0.8230   -0.4983
## Ldensity                 0.0862 0.0347    0.0286   0.0866    0.1427
## I(Year - 1991)           0.0211 0.0116    0.0020   0.0211    0.0402
## Ldensity:I(Year - 1991) -0.0045 0.0020   -0.0077  -0.0045   -0.0013
##                            mode kld
## (Intercept)             -0.9196   0
## elevS                   -0.0092   0
## cwaLIX                  -0.2353   0
## cwaMEG                  -0.2601   0
## cwaMOB                  -0.8134   0
## Ldensity                 0.0873   0
## I(Year - 1991)           0.0211   0
## Ldensity:I(Year - 1991) -0.0045   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean      sd
## size for the nbinomial observations (overdispersion)  2.607  0.3893
## Precision for Year2                                   2.292  0.5844
## Precision for ID                                     20.679 13.7592
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     2.031    2.571
## Precision for Year2                                      1.455    2.228
## Precision for ID                                         7.343   16.918
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)     3.304  2.497
## Precision for Year2                                      3.351  2.107
## Precision for ID                                        46.528 12.014
## 
## Expected number of effective parameters(std dev): 60.44(5.492)
## Number of equivalent replicates : 61.05 
## 
## Deviance Information Criterion: 5669.29
## Effective number of parameters: 62.10
## 
## Marginal Likelihood:  -2992.35 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
formula = nT ~ elevS + JAN +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2622          5.5908          0.7361          6.5890 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -1.3788 0.2167   -1.7333  -1.3802   -1.0195
## elevS                   -0.0086 0.0053   -0.0174  -0.0085   -0.0001
## JANTRUE                  0.3412 0.0928    0.1870   0.3422    0.4918
## Ldensity                 0.1159 0.0329    0.0610   0.1162    0.1694
## I(Year - 1991)           0.0220 0.0116    0.0029   0.0220    0.0411
## Ldensity:I(Year - 1991) -0.0048 0.0020   -0.0080  -0.0048   -0.0015
##                            mode kld
## (Intercept)             -1.3831   0
## elevS                   -0.0083   0
## JANTRUE                  0.3442   0
## Ldensity                 0.1170   0
## I(Year - 1991)           0.0220   0
## Ldensity:I(Year - 1991) -0.0048   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean      sd
## size for the nbinomial observations (overdispersion)  2.559  0.3801
## Precision for ID                                     28.099 24.3386
## Precision for Year2                                   2.294  0.5849
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.998    2.524
## Precision for ID                                         7.867   20.914
## Precision for Year2                                      1.456    2.229
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)     3.240  2.449
## Precision for ID                                        71.860 13.144
## Precision for Year2                                      3.354  2.109
## 
## Expected number of effective parameters(std dev): 57.84(6.064)
## Number of equivalent replicates : 63.80 
## 
## Deviance Information Criterion: 5675.87
## Effective number of parameters: 59.67
## 
## Marginal Likelihood:  -2986.41 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
formula = nT ~ elevS + MOB +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2611          8.0009          0.2929          8.5549 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -0.9762 0.2211   -1.3398  -0.9764   -0.6118
## elevS                   -0.0100 0.0055   -0.0191  -0.0100   -0.0010
## MOBTRUE                 -0.7629 0.2053   -1.1054  -0.7600   -0.4303
## Ldensity                 0.0806 0.0353    0.0221   0.0807    0.1385
## I(Year - 1991)           0.0214 0.0116    0.0022   0.0214    0.0405
## Ldensity:I(Year - 1991) -0.0046 0.0020   -0.0078  -0.0046   -0.0013
##                            mode kld
## (Intercept)             -0.9767   0
## elevS                   -0.0099   0
## MOBTRUE                 -0.7541   0
## Ldensity                 0.0811   0
## I(Year - 1991)           0.0214   0
## Ldensity:I(Year - 1991) -0.0046   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean     sd
## size for the nbinomial observations (overdispersion)  2.603 0.3893
## Precision for ID                                     12.459 5.6122
## Precision for Year2                                   2.290 0.5839
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     2.031    2.566
## Precision for ID                                         5.806   11.271
## Precision for Year2                                      1.454    2.226
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     3.302 2.486
## Precision for ID                                        23.132 9.302
## Precision for Year2                                      3.349 2.105
## 
## Expected number of effective parameters(std dev): 62.90(5.097)
## Number of equivalent replicates : 58.66 
## 
## Deviance Information Criterion: 5668.38
## Effective number of parameters: 64.21
## 
## Marginal Likelihood:  -2984.54 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

South Dakota

Borders

FP = 46
AB = "SD"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 66
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 199366.8

Elevation

ElevA = Elev
unzip("15-B.zip",exdir="./tmp")
ElevB = raster("./tmp/15-B.tif")
Elev = merge(ElevA, ElevB)
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal

CWA

cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county], 
                    cwa = CWA$V3[CWA$V7 %in% county])

Population

pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 833354
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
  group_by(ID) %>%
  summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)

Tornadoes

ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears =  Nyears,
                     nT = nT, area = area/1000000, 
                     rate = nT/area * 10^6 * 10000 / Nyears,
                     stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = -0.8577, df = 64, p-value = 0.3943
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.3042850  0.0998948
## sample estimates:
##        cor 
## -0.1065965
cor.test(spdf$nT, spdf$pop, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$pop
## t = 3.368, df = 64, p-value = 0.001286
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.1995193 0.5488235
## sample estimates:
##       cor 
## 0.3880175
nTor.year = TracksAll.df %>%
  filter(!duplicated(SeqID)) %>%
  group_by(Year) %>%
  summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 423
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize, 
                  nT = length(county), 
                  nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
                 nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year

Raw counts and tracks

p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=44.43 +lon_0=-100.2 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1SD = plotTorn(x = spdf, tracks = TracksAll.df) +
         scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)", 
                              colours = crq(10))

Linear Trend (%/yr)

formula = nT ~ Year
model = inla(formula = formula, 
                 family = "nbinomial", 
                 quantiles = c(.05, .5, .95), 
                 data = nTor.year,
                 control.compute = control$compute,
                 control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL =  model$summary.fixed$'0.05quant'[2]
TH =  model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] -1.596424
(exp(TL) - 1) * 100
## [1] -3.044101
(exp(TH) - 1) * 100
## [1] -0.1358805

Nonlinear trend

formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula, 
                     family = "nbinomial", 
                     quantiles = c(.05, .5, .95), 
                     data = nTor.year,
                     control.compute = control$compute,
                     control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
                 QL = model$summary.fitted.values$"0.05quant",
                 QM = model$summary.fitted.values$"0.5quant",
                 QH = model$summary.fitted.values$"0.95quant",
                 nT = nTor.year$nT,
                 ST = AB)
p2SD = ggplot(out, aes(x = Year, y = QM)) +
  geom_line(color = 'blue') +
  geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
  geom_point(aes(x = Year, y = nT)) +
  ylab("Number of EF1+ Tornadoes") +
  theme_bw()

Spatial neighborhood definition as an inla graph

nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")

Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.

formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
                Ldensity + I(Year - 1991) +
                f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1949          4.5249          0.1829          4.9027 
## 
## Fixed effects:
##                   mean     sd 0.05quant 0.5quant 0.95quant    mode kld
## (Intercept)    -2.5530 0.1388   -2.7845  -2.5510   -2.3282 -2.5471   0
## Ldensity        0.1693 0.0541    0.0791   0.1700    0.2569  0.1716   0
## I(Year - 1991) -0.0173 0.0088   -0.0318  -0.0173   -0.0029 -0.0172   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean     sd
## size for the nbinomial observations (overdispersion) 0.3746 0.0620
## Precision for ID                                     4.6295 2.1785
## Precision for Year2                                  2.8847 0.9629
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.2833   0.3687
## Precision for ID                                        2.0681   4.1620
## Precision for Year2                                     1.6052   2.7342
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.4859 0.3566
## Precision for ID                                        8.7786 3.3931
## Precision for Year2                                     4.6734 2.4571
## 
## Expected number of effective parameters(std dev): 46.21(4.871)
## Number of equivalent replicates : 60.49 
## 
## Deviance Information Criterion: 2500.12
## Effective number of parameters: 47.39
## 
## Marginal Likelihood:  -1343.84 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model diagnostics

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 5.8315, p-value = 0.001158
-mean(log(model$cpo$cpo))
## [1] 0.4477812
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 14.7524, df = 2793, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.2397515 0.2974949
## sample estimates:
##       cor 
## 0.2688647
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.2853985

Model w/out spatial term

formula0 = nT ~ Ldensity + I(Year - 1991) +
                f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model0)
## 
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1401          2.5936          0.1900          2.9237 
## 
## Fixed effects:
##                   mean     sd 0.05quant 0.5quant 0.95quant    mode kld
## (Intercept)    -2.7862 0.1312   -3.0062  -2.7837   -2.5751 -2.7785   0
## Ldensity        0.3135 0.0376    0.2520   0.3133    0.3759  0.3128   0
## I(Year - 1991) -0.0167 0.0091   -0.0317  -0.0167   -0.0019 -0.0166   0
## 
## Random effects:
## Name   Model
##  Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean     sd
## size for the nbinomial observations (overdispersion) 0.2917 0.0433
## Precision for Year2                                  2.6839 0.8814
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     0.227    0.288
## Precision for Year2                                      1.506    2.549
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.3688 0.2804
## Precision for Year2                                     4.3203 2.2995
## 
## Expected number of effective parameters(std dev): 31.17(2.41)
## Number of equivalent replicates : 89.68 
## 
## Deviance Information Criterion: 2543.55
## Effective number of parameters: 32.31
## 
## Marginal Likelihood:  -1308.76 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
## 
## FALSE  TRUE 
##    39    27
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3SD = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]", 
            low = muted("blue"), mid = "white", high = muted("red"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
  geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("red"), size = 2) +
  geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("blue"), size = 2) +
  geom_text(data = df3[df3$Sig,], 
              aes(x = long, y = lat, group = NULL, label = round(newfield)), 
              vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
                     Elev = unlist(Elev.data), 
                     ID = rep(spdf$ID, sapply(Elev.data, nrow)),
                     stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
## 
##  One Sample t-test
## 
## data:  Elev.df$Elev
## t = 2928.796, df = 1300438, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
##  664.6101 665.3570
## sample estimates:
## mean of x 
##  664.9836
CE.df = Elev.df %>%
  group_by(ID) %>%
  summarize(elev = mean(Elev, na.rm = TRUE),
            elevS = sd(Elev, na.rm = TRUE),
            elevK = kurtosis(Elev, na.rm = TRUE),
            elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$elevS
## t = -0.9259, df = 64, p-value = 0.358
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.31195803  0.09149449
## sample estimates:
##        cor 
## -0.1149704
range(spdf$elevS)
## [1]   6.386208 420.508863
formula = nT ~ elevS +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor2,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2102          5.1065          0.8625          6.1793 
## 
## Fixed effects:
##                   mean     sd 0.05quant 0.5quant 0.95quant    mode kld
## (Intercept)    -2.4928 0.1432   -2.7312  -2.4911   -2.2604 -2.4876   0
## elevS          -0.0020 0.0012   -0.0039  -0.0020    0.0000 -0.0020   0
## Ldensity        0.2142 0.0604    0.1130   0.2153    0.3114  0.2178   0
## I(Year - 1991) -0.0171 0.0088   -0.0316  -0.0170   -0.0027 -0.0170   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                        mean     sd
## size for the nbinomial observations (overdispersion) 0.3694 0.0611
## Precision for ID                                     8.2729 6.3286
## Precision for Year2                                  2.8836 0.9635
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.2792   0.3637
## Precision for ID                                        2.5953   6.4608
## Precision for Year2                                     1.6039   2.7326
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.4786 0.3523
## Precision for ID                                       19.9173 4.3100
## Precision for Year2                                     4.6735 2.4547
## 
## Expected number of effective parameters(std dev): 43.94(5.391)
## Number of equivalent replicates : 63.61 
## 
## Deviance Information Criterion: 2502.50
## Effective number of parameters: 45.63
## 
## Marginal Likelihood:  -1352.57 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
p4SD = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +  
  xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
table(cwa.df$cwa)
## 
## ABR FSD UNR 
##  26  24  25
cwa.df$ABR = cwa.df$cwa == "ABR"
cwa.df$FSD = cwa.df$cwa == "FSD"
cwa.df$UNR = cwa.df$cwa == "UNR"
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ elevS + cwa +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + I(Year - 1991) + 
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2442          6.1058          0.2709          6.6208 
## 
## Fixed effects:
##                   mean     sd 0.05quant 0.5quant 0.95quant    mode kld
## (Intercept)    -2.5747 0.1459   -2.8175  -2.5729   -2.3378 -2.5695   0
## elevS          -0.0022 0.0007   -0.0034  -0.0022   -0.0010 -0.0022   0
## cwaFSD          0.4265 0.1547    0.1717   0.4266    0.6812  0.4267   0
## cwaUNR         -0.3241 0.1538   -0.5777  -0.3240   -0.0710 -0.3236   0
## Ldensity        0.2427 0.0460    0.1674   0.2424    0.3189  0.2419   0
## I(Year - 1991) -0.0177 0.0087   -0.0320  -0.0177   -0.0035 -0.0177   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                           mean        sd
## size for the nbinomial observations (overdispersion) 3.357e-01 5.060e-02
## Precision for ID                                     1.838e+04 1.826e+04
## Precision for Year2                                  2.795e+00 8.954e-01
##                                                      0.05quant  0.5quant
## size for the nbinomial observations (overdispersion)    0.2605 3.313e-01
## Precision for ID                                     1846.3234 1.299e+04
## Precision for Year2                                     1.5906 2.661e+00
##                                                      0.95quant      mode
## size for the nbinomial observations (overdispersion) 4.259e-01    0.3223
## Precision for ID                                     5.293e+04 3370.8045
## Precision for Year2                                  4.453e+00    2.4131
## 
## Expected number of effective parameters(std dev): 35.30(2.176)
## Number of equivalent replicates : 90.14 
## 
## Deviance Information Criterion: 2881.48
## Effective number of parameters: 36.32
## 
## Marginal Likelihood:  -1544.17 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
formula = nT ~ elevS + UNR +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + I(Year - 1991) + 
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2360          5.8148          0.2693          6.3201 
## 
## Fixed effects:
##                   mean     sd 0.05quant 0.5quant 0.95quant    mode kld
## (Intercept)    -2.4406 0.1515   -2.6931  -2.4387   -2.1949 -2.4348   0
## elevS          -0.0020 0.0010   -0.0036  -0.0020   -0.0003 -0.0021   0
## UNRTRUE        -0.2104 0.2617   -0.6145  -0.2251    0.2434 -0.2593   0
## Ldensity        0.2229 0.0605    0.1209   0.2245    0.3197  0.2282   0
## I(Year - 1991) -0.0178 0.0087   -0.0321  -0.0178   -0.0035 -0.0177   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                         mean      sd
## size for the nbinomial observations (overdispersion)  0.3523  0.0546
## Precision for ID                                     11.5017 13.7402
## Precision for Year2                                   2.7466  0.8737
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)    0.2713   0.3475
## Precision for ID                                        2.4843   7.3660
## Precision for Year2                                     1.5685   2.6172
##                                                      0.95quant   mode
## size for the nbinomial observations (overdispersion)    0.4497 0.3376
## Precision for ID                                       33.7109 3.9967
## Precision for Year2                                     4.3643 2.3770
## 
## Expected number of effective parameters(std dev): 46.48(6.237)
## Number of equivalent replicates : 68.46 
## 
## Deviance Information Criterion: 2874.00
## Effective number of parameters: 48.59
## 
## Marginal Likelihood:  -1545.18 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Ohio

Borders

FP = 39
AB = "OH"
ST.sp = US.sp[US.sp$STATEFP == FP, ]
nrow(ST.sp)
## [1] 88
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county)) 
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
rgeos::gArea(counties.sp) / 10^6
## [1] 105954

Elevation

Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different datum & ellipsoid
## Warning in ReplProj4string(obj, CRS(value)): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal

CWA

cwa.df = data.frame(county = CWA$V7[CWA$V7 %in% county], 
                    cwa = CWA$V3[CWA$V7 %in% county])

Population

pop0 = subset(pop0a, state_fips == FP)
pop0$pop2013 = pop0$pop2012
pop0$pop2013[1]
## [1] 11544225
pop0$county = as.character(pop0$fips)
pop1 = pop0[-1, -(1:10)]
Pop.df = melt(pop1, id.vars = "county")
Pop.df$Year = as.numeric(substring(Pop.df$variable, first = 4, last = 7))
names(Pop.df)[3:4] = c("pop", "Year")
Pop.df$lpop = log10(Pop.df$pop)
Pop.df$ID = match(Pop.df$county, county) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ]
PC = Pop.df %>%
  group_by(ID) %>%
  summarize(Change = (pop[Year == max(Year)] - pop[Year == min(Year)])/pop[Year == max(Year)] * 100)
PC.df = as.data.frame(PC)
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
area = rgeos::gArea(counties.sp, byid = TRUE)
spdf$area = area
poplatest = subset(Pop.df, Year == max(Year))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)

Tornadoes

ct = over(counties.sp, TornL2, returnList = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = AB, county = county, Nyears =  Nyears,
                     nT = nT, area = area/1000000, 
                     rate = nT/area * 10^6 * 10000 / Nyears,
                     stringsAsFactors = FALSE, ID = 1:length(county))
spdf$state = AB
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
TracksAll.df = ldply(ct, data.frame)
colnames(TracksAll.df)[1] = "county"
cor.test(spdf$nT, spdf$area, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$area
## t = 3.3391, df = 86, p-value = 0.001244
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.1725512 0.4862321
## sample estimates:
##       cor 
## 0.3387718
cor.test(spdf$nT, spdf$pop, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$pop
## t = 1.9243, df = 86, p-value = 0.05762
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.02762961 0.36657054
## sample estimates:
##       cor 
## 0.2031789
nTor.year = TracksAll.df %>%
  filter(!duplicated(SeqID)) %>%
  group_by(Year) %>%
  summarize(nT = n())
nTor.year = as.data.frame(nTor.year)
nTor.year$Year2 = nTor.year$Year
sum(nTor.year$nT)
## [1] 501
nTor.day0 = ddply(TracksAll.df, .(county, Year), summarize, 
                  nT = length(county), 
                  nTd = length(unique(Date)), .drop = FALSE)
nTor.day = merge(nTor.df[c("state", "county", "area", "ID")],
                 nTor.day0)
popNtor = merge(Pop.df, nTor.day, all = FALSE)
popNtor$density = with(popNtor, pop/area)
popNtor$Ldensity = log2(popNtor$density)
popNtor$Year2 = popNtor$Year

Raw counts and tracks

p4s = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40.28 +lon_0=-82.79 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdf = spTransform(spdf, CRS(p4s))
crq = wes.palette(name = "Zissou", type = "continuous")
p1OH = plotTorn(x = spdf, tracks = TracksAll.df) +
         scale_fill_gradientn("EF1+\nTornado Reports\n(1970-2013)", 
                              colours = crq(10))

Linear Trend (%/yr)

formula = nT ~ Year
model = inla(formula = formula, 
                 family = "nbinomial", 
                 quantiles = c(.05, .5, .95), 
                 data = nTor.year,
                 control.compute = control$compute,
                 control.predictor = control$predictor)
TM = model$summary.fixed$mean[2]
TL =  model$summary.fixed$'0.05quant'[2]
TH =  model$summary.fixed$'0.95quant'[2]
(exp(TM) - 1) * 100
## [1] -1.447458
(exp(TL) - 1) * 100
## [1] -2.846083
(exp(TH) - 1) * 100
## [1] -0.03067495

Nonlinear trend

formula = nT ~ Year + f(Year2, model = "rw2")
model = inla(formula = formula, 
                     family = "nbinomial", 
                     quantiles = c(.05, .5, .95), 
                     data = nTor.year,
                     control.compute = control$compute,
                     control.predictor = control$predictor)
out = data.frame(Year = nTor.year$Year,
                 QL = model$summary.fitted.values$"0.05quant",
                 QM = model$summary.fitted.values$"0.5quant",
                 QH = model$summary.fitted.values$"0.95quant",
                 nT = nTor.year$nT,
                 ST = AB)
p2OH = ggplot(out, aes(x = Year, y = QM)) +
  geom_line(color = 'blue') +
  geom_ribbon(aes(x = Year, ymax = QH, ymin = QL), color = "gray", alpha = .25) +
  geom_point(aes(x = Year, y = nT)) +
  ylab("Number of EF1+ Tornadoes") +
  theme_bw()

Spatial neighborhood definition as an inla graph

nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")

Model with spatial term. Determine the model that gives lowest DIC. Then compare with same model without the spatial term.

formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
                Ldensity + Ldensity:I(Year - 1991) +
                f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2239          6.0110          0.2869          6.5219 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -2.0651 0.2978   -2.5557  -2.0648   -1.5753
## Ldensity                 0.0714 0.0461   -0.0051   0.0717    0.1466
## Ldensity:I(Year - 1991) -0.0031 0.0013   -0.0053  -0.0031   -0.0010
##                            mode kld
## (Intercept)             -2.0642   0
## Ldensity                 0.0724   0
## Ldensity:I(Year - 1991) -0.0031   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 3.393 0.9018
## Precision for ID                                     2.563 0.9626
## Precision for Year2                                  2.071 0.5782
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     2.179    3.257
## Precision for ID                                         1.361    2.380
## Precision for Year2                                      1.263    1.998
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     5.070 2.995
## Precision for ID                                         4.378 2.059
## Precision for Year2                                      3.131 1.861
## 
## Expected number of effective parameters(std dev): 68.82(5.99)
## Number of equivalent replicates : 54.98 
## 
## Deviance Information Criterion: 3301.95
## Effective number of parameters: 69.38
## 
## Marginal Likelihood:  -1781.62 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Model diagnostics

modPIT = model$cpo$pit - runif(length(model$cpo$cpo)) * model$cpo$cpo
goftest::ad.test(modPIT)
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: uniform distribution
## 
## data:  modPIT
## An = 0.2594, p-value = 0.9652
-mean(log(model$cpo$cpo))
## [1] 0.4363091
cor.test(popNtor$nT, model$summary.fitted.values$mean, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  popNtor$nT and model$summary.fitted.values$mean
## t = 22.5925, df = 3782, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.3210537 0.3681847
## sample estimates:
##       cor 
## 0.3448365
brier.score(popNtor[["nT"]], model[["summary.fitted.values"]])
## [1] 0.2118813

Model w/out spatial term

formula0 = nT ~ Ldensity + Ldensity:I(Year - 1991) +
                f(Year2, model = "iid")
model0 = inla(formula = formula0, family = "nbinomial", E = area/2000,
                 data = popNtor,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model0)
## 
## Call:
## c("inla(formula = formula0, family = \"nbinomial\", data = popNtor, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1701          3.2173          0.2355          3.6229 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -2.1447 0.2172   -2.5036  -2.1439   -1.7883
## Ldensity                 0.1010 0.0300    0.0514   0.1012    0.1502
## Ldensity:I(Year - 1991) -0.0030 0.0013   -0.0051  -0.0030   -0.0009
##                            mode kld
## (Intercept)             -2.1423   0
## Ldensity                 0.1015   0
## Ldensity:I(Year - 1991) -0.0030   0
## 
## Random effects:
## Name   Model
##  Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 2.533 0.6113
## Precision for Year2                                  2.080 0.5844
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.699    2.444
## Precision for Year2                                      1.266    2.005
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     3.663 2.269
## Precision for Year2                                      3.153 1.863
## 
## Expected number of effective parameters(std dev): 36.73(1.491)
## Number of equivalent replicates : 103.02 
## 
## Deviance Information Criterion: 3363.54
## Effective number of parameters: 37.65
## 
## Marginal Likelihood:  -1730.59 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
spdf$RandomMean = model$summary.random$ID$mean
spdf$RandomQ50 = model$summary.random$ID$'0.5quant'
spdf$sd = model$summary.random$ID$sd
df = model$summary.random$ID
names(df) = c("ID", "mean", "sd", "QL", "QM", "QH", "mode", "kld")
df$QL = (exp(df$QL) - 1) * 100
df$QM = (exp(df$QM) - 1) * 100
df$QH = (exp(df$QH) - 1) * 100
df$Sig = sign(df$QL) == sign(df$QH)
table(df$Sig)
## 
## FALSE  TRUE 
##    64    24
spdf$Sig = df$Sig
CRS0 = CRS("+proj=longlat +datum=WGS84")
bc = getBordersAndCenters(spdf, CRS0)
df2 = bc$centers
df3 = cbind(df, df2)
names(df3)[5] = 'newfield'
borders.df = merge(bc$borders, df3[c("county", "newfield", "Sig")], by = "county")
borders2.df = borders.df[borders.df$Sig, ]
p3OH = ggplotSpdata(spdf, "RandomQ50", tfun = function(x) round((exp(x) - 1) * 100)) +
        scale_fill_gradient2("Tornado Occurrence\n[% of State Avg]", 
            low = muted("blue"), mid = "white", high = muted("red"),
            midpoint = 0, space = "rgb", na.value = "grey50", guide = "colourbar") +
  geom_polygon(data = borders2.df[borders2.df$newfield > 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("red"), size = 2) +
  geom_polygon(data = borders2.df[borders2.df$newfield < 0, ],
               aes(x = long, y = lat, group = group, fill = newfield),
               color = muted("blue"), size = 2) +
  geom_text(data = df3[df3$Sig,], 
              aes(x = long, y = lat, group = NULL, label = round(newfield)), 
              vjust = .5, size = 3, color = 'black')
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data, nrow)),
                     Elev = unlist(Elev.data), 
                     ID = rep(spdf$ID, sapply(Elev.data, nrow)),
                     stringsAsFactors = FALSE)
t.test(Elev.df$Elev, conf.level = .9)
## 
##  One Sample t-test
## 
## data:  Elev.df$Elev
## t = 4404.671, df = 653144, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 90 percent confidence interval:
##  279.4523 279.6611
## sample estimates:
## mean of x 
##  279.5567
CE.df = Elev.df %>%
  group_by(ID) %>%
  summarize(elev = mean(Elev, na.rm = TRUE),
            elevS = sd(Elev, na.rm = TRUE),
            elevK = kurtosis(Elev, na.rm = TRUE),
            elevCV = elevS/elev)
all(spdf$ID == CE.df$ID)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
popNtor2 = merge(popNtor, CE.df, by = "ID")
cor.test(spdf$nT, spdf$elevS, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  spdf$nT and spdf$elevS
## t = -0.6093, df = 86, p-value = 0.544
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  -0.2393272  0.1122831
## sample estimates:
##        cor 
## -0.0655567
formula = nT ~ elevS +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + Ldensity:I(Year - 1991) +
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor2,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor2, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2482          6.4332          0.3003          6.9817 
## 
## Fixed effects:
##                            mean     sd 0.05quant 0.5quant 0.95quant
## (Intercept)             -2.0692 0.3242   -2.6039  -2.0685   -1.5367
## elevS                    0.0003 0.0079   -0.0126   0.0003    0.0133
## Ldensity                 0.0706 0.0483   -0.0095   0.0710    0.1494
## Ldensity:I(Year - 1991) -0.0031 0.0013   -0.0053  -0.0031   -0.0010
##                            mode kld
## (Intercept)             -2.0672   0
## elevS                    0.0002   0
## Ldensity                 0.0718   0
## Ldensity:I(Year - 1991) -0.0031   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 3.389 0.9004
## Precision for ID                                     2.485 0.9268
## Precision for Year2                                  2.072 0.5777
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     2.176    3.253
## Precision for ID                                         1.322    2.310
## Precision for Year2                                      1.263    2.000
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     5.063 2.991
## Precision for ID                                         4.232 2.003
## Precision for Year2                                      3.130 1.864
## 
## Expected number of effective parameters(std dev): 69.83(5.945)
## Number of equivalent replicates : 54.19 
## 
## Deviance Information Criterion: 3302.92
## Effective number of parameters: 70.33
## 
## Marginal Likelihood:  -1789.94 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed
p4OH = ggplotmargin(model, type = "fixed", effect = "elevS", xlab = "") +  
  xlab(expression("Elevation S.D. Coefficient"~"["*m^{-1}*"]"))
popNtor3 = merge(popNtor2, cwa.df, by = "county")
formula = nT ~ cwa +
               f(ID, model = "besag", graph = tornb.inla, constr = TRUE) + 
               Ldensity + I(Year - 1991) + 
               f(Year2, model = "iid")
model = inla(formula = formula, family = "nbinomial", E = area/2000,
                 data = popNtor3,
                 quantiles = c(.05, .5, .95),
                 control.compute = control$compute,
                 control.predictor = control$predictor,
                 control.results = control$results,
                 control.family = control$family
                 )
summary(model)
## 
## Call:
## c("inla(formula = formula, family = \"nbinomial\", data = popNtor3, ",  "    quantiles = c(0.05, 0.5, 0.95), E = area/2000, control.compute = control$compute, ",  "    control.predictor = control$predictor, control.family = control$family, ",  "    control.results = control$results)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.2672          6.3503          0.3030          6.9205 
## 
## Fixed effects:
##                   mean     sd 0.05quant 0.5quant 0.95quant    mode kld
## (Intercept)    -1.6914 0.3113   -2.2057  -1.6902   -1.1816 -1.6877   0
## cwaILN         -0.3197 0.2148   -0.6822  -0.3139    0.0232 -0.3007   0
## cwaIWX          0.1501 0.2522   -0.2592   0.1470    0.5705  0.1406   0
## cwaPBZ         -0.7534 0.2635   -1.1837  -0.7553   -0.3167 -0.7584   0
## cwaRLX         -1.1378 0.3236   -1.6744  -1.1351   -0.6108 -1.1283   0
## Ldensity        0.0604 0.0415   -0.0080   0.0605    0.1284  0.0607   0
## I(Year - 1991) -0.0184 0.0094   -0.0340  -0.0184   -0.0030 -0.0183   0
## 
## Random effects:
## Name   Model
##  ID   Besags ICAR model 
## Year2   IID model 
## 
## Model hyperparameters:
##                                                       mean     sd
## size for the nbinomial observations (overdispersion) 3.011 0.7672
## Precision for ID                                     7.649 6.2485
## Precision for Year2                                  2.046 0.5721
##                                                      0.05quant 0.5quant
## size for the nbinomial observations (overdispersion)     1.972    2.897
## Precision for ID                                         2.353    5.806
## Precision for Year2                                      1.246    1.975
##                                                      0.95quant  mode
## size for the nbinomial observations (overdispersion)     4.433 2.676
## Precision for ID                                        18.980 3.774
## Precision for Year2                                      3.095 1.840
## 
## Expected number of effective parameters(std dev): 59.66(7.472)
## Number of equivalent replicates : 64.17 
## 
## Deviance Information Criterion: 3352.09
## Effective number of parameters: 61.15
## 
## Marginal Likelihood:  -1815.33 
## CPO and PIT are computed
## 
## Posterior marginals for linear predictor and fitted values computed

Four-panel plots

source("multiplot.R")
mat = matrix(1:4, nrow = 2, byrow = TRUE)
p1IL = p1IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p1MS = p1MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p1SD = p1SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p1OH = p1OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p1IL, p1MS, p1SD, p1OH, layout = mat)

p2IL = p2IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p2MS = p2MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p2SD = p2SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p2OH = p2OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p2IL, p2MS, p2SD, p2OH, layout = mat)

p3IL = p3IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p3MS = p3MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p3SD = p3SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p3OH = p3OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p3IL, p3MS, p3SD, p3OH, layout = mat)

p4IL = p4IL + ggtitle("a Illinois") + theme(plot.title = element_text(hjust = 0, size = 15))
p4MS = p4MS + ggtitle("b Mississippi") + theme(plot.title = element_text(hjust = 0, size = 15))
p4SD = p4SD + ggtitle("c South Dakota") + theme(plot.title = element_text(hjust = 0, size = 15))
p4OH = p4OH + ggtitle("d Ohio") + theme(plot.title = element_text(hjust = 0, size = 15))
multiplot(p4IL, p4MS, p4SD, p4OH, layout = mat)